library(tidyverse) # used for data manipulation
library(rmarkdown) # used for paged_table function
library(wakefield)#to generate vector of random names
library(ggrepel)
library(EnhancedVolcano)#volcanoPlot package
Input data includes column “Log2FC” and “p.adj”.
VolcanoPlot_General <- function(Input, Omics, OutputName){
VolcanoPlot <- EnhancedVolcano (Input,
lab = paste("Input$", Omics),#Metabolite name
x = "Log2FC",#Log2FC
y = "p.adj",#p-value or q-value
xlab = "Log2FC",
ylab = bquote(~-Log[10]~`p.adj`),#(~-Log[10]~adjusted~italic(P))
pCutoff = 0.9,
FCcutoff = 0.01,#Cut off Log2FC, automatically 2
pointSize = 4,
labSize = 1,
titleLabSize = 16,
col=c("black", "grey", "grey", "red"),#if you want to change colors
colAlpha = 0.5,
title=paste(OutputName),
subtitle = paste(Omics),
caption = paste0("total = ", nrow(Input)/2, " ", Omics),
xlim = c((((Reduce(min,Input$Log2FC))))-0.5,(((Reduce(max,Input$Log2FC))))+0.5),
ylim = c(0,((-log10(Reduce(min,Input$p.adj))))+0.1),
#drawConnectors = TRUE,
#widthConnectors = 0.5,
#colConnectors = "black",
cutoffLineType = "dashed",
cutoffLineCol = "black",
cutoffLineWidth = 0.5,
legendPosition = 'right',
legendLabSize = 12,
legendIconSize = 5.0
)
ggsave(file=paste("Output_Figures/VolcanoPlot_General", OutputName, ".pdf", sep="_"), plot=VolcanoPlot, width=10, height=8)
plot(VolcanoPlot)
}
# Make an example DF:
Gene <- name(300)
Log2FC <- runif(300, min=-5, max=5)
p.adj <- runif(300, min=0, max=1)
DF_Input <- data.frame(Gene, Log2FC, p.adj, stringsAsFactors=FALSE)
# Make the plot
VolcanoPlot_General(Input=DF_Input,
Omics="Gene",
OutputName="RNAseq_01.05.2020")
Input data includes column “Log2FC” and “p.adj”
VolcanoPlot_Multiple <- function(InputData_1, Condition_1, InputData_2, Condition2, OutputPlotName){
#1. Include a column naming the set:
InputData_1[,"comparison"] <- as.character("InputData1")
InputData_2[,"comparison"] <- as.character("InputData2")
#2. Add the colour:
InputData_1[,"colour"] <- as.character("blue")
InputData_2[,"colour"] <- as.character("red")
#3. Combine the files
Combined <- rbind(InputData_1,InputData_2)
#4.Prepare new colour scheme
keyvals <- ifelse(
Combined$colour == "blue", "blue",
ifelse(Combined$colour == "red", "red",
"black"))
keyvals[is.na(keyvals)] <- 'black'
names(keyvals)[keyvals == 'blue'] <- paste(Condition_1)
names(keyvals)[keyvals == 'red'] <- paste(Condition2)
names(keyvals)[keyvals == 'black'] <- 'X'
#5. Make the Plot
VolcanoPlot <- EnhancedVolcano (Combined,
lab = Combined$Gene,
x = "Log2FC",#Log2FC
y = "p.adj",#p-value or q-value
xlab = bquote(~Log[2]~ "FC"),
ylab = bquote(~-Log[10]~p.adj),#(~-Log[10]~adjusted~italic(P))
pCutoff = 0.05,
FCcutoff = 0.5,#Cut off Log2FC, automatically 2
pointSize = 3,
labSize = 1.5,
colCustom = keyvals,
titleLabSize = 16,
colAlpha = 0.5,
title=paste(OutputPlotName),
subtitle = paste("Comparison of two conditions"),
caption = paste0("total = ", (nrow(Combined)/2),"Genes"),
xlim = c((((Reduce(min,Combined$Log2FC))))-0.5,(((Reduce(max,Combined$Log2FC))))+0.5),
ylim = c(0,(ceiling(-log10(Reduce(min,Combined$p.adj))))),
#drawConnectors = TRUE,
#widthConnectors = 0.5,
#colConnectors = "black",
cutoffLineType = "dashed",
cutoffLineCol = "black",
cutoffLineWidth = 0.5,
legendLabels=c('No changes',"-0.5< Log2FC <0.5", 'p.adj<0.05 & -0.5< Log2FC <0.5"'),
legendPosition = 'right',
legendLabSize = 12,
legendIconSize = 5.0,
gridlines.major = FALSE,
gridlines.minor = FALSE,
)
ggsave(file=paste("Output_Figures/VolcanoPlot_Multiple", OutputPlotName, ".pdf", sep="_"), plot=VolcanoPlot, width=10, height=8)
plot(VolcanoPlot)
}
# Make an example DF:
Gene <- name(300)
Log2FC <- runif(300, min=-5, max=5)
p.adj <- runif(300, min=0, max=0.9)
DF_Input_C1 <- data.frame(Gene, Log2FC, p.adj, stringsAsFactors=FALSE)
Log2FC <- runif(300, min=-2, max=9)
p.adj <- runif(300, min=0, max=0.7)
DF_Input_C2 <- data.frame(Gene, Log2FC, p.adj, stringsAsFactors=FALSE)
# Plot
VolcanoPlot_Multiple(InputData_1=DF_Input_C1,
Condition_1="60min_versus_0min",
InputData_2=DF_Input_C2,
Condition2="120min_versus_0min",
OutputPlotName="Proteomics_01.06.2020")
Input data includes column “Log2FC”, “p.adj” and “gene”.
The metabolic signatures used are based on Gaude et al and are used to plot specific metabolic pathways of interest. Of course, here any signatures used to perform enrichment analysis can be used (e.g. KEGG, BIOCARTA, REACTOME, GO-terms,…).
As discussed by Gaude et al. metabolic enzymes can be part of multiple metabolic pathways. Here, I will highlight enzymes that are unique for the pathway and enzymes that are part of multiple pathways.
library(ggrepel)
library(EnhancedVolcano)
VolcanoPlot_MetabolicPathway <- function(Input, Signature, Signature_term){
Pathway <- subset(Signature, term == paste(Signature_term))
Volcano1 <- merge(x=Pathway,y=Input, by.x="gene", by.y="Gene", all.x=TRUE)%>%
na.omit()
#Prepare the colour scheme:
keyvals <- ifelse(
Volcano1$Unique == "Unique", "red",
ifelse(Volcano1$Unique == "In multiple Pathways", "blue",
"black"))
names(keyvals)[is.na(keyvals)] <- "black"
names(keyvals)[keyvals == 'black'] <- "NA"
names(keyvals)[keyvals == 'red'] <- "Unique"
names(keyvals)[keyvals == 'blue'] <- "In multiple Pathways"
#Prepare the symbols:
keyvals.shape <- ifelse(
Volcano1$Unique == "Unique", 19,
ifelse(Volcano1$Unique == "In multiple Pathways", 18,
3))
keyvals.shape[is.na(keyvals.shape)] <- 3
names(keyvals.shape)[keyvals.shape == 3] <- 'NA'
names(keyvals.shape)[keyvals.shape == 19] <- 'Unique'
names(keyvals.shape)[keyvals.shape == 18] <- 'In multiple Pathways'
#plot
if(dim(Volcano1)>=1){
VolcanoPlot <- EnhancedVolcano (Volcano1,
lab = Volcano1$gene,#Metabolite name
x = "Log2FC",#Log2FC
y = "p.adj",#p-value or q-value
xlab = "Log2FC",
ylab = bquote(~-Log[10]~`p.adj`),#(~-Log[10]~adjusted~italic(P))
pCutoff = 0.05,
FCcutoff = 0.5,#Cut off Log2FC, automatically 2
pointSize = 3,
labSize = 1,
shapeCustom = keyvals.shape,
colCustom = keyvals,
titleLabSize = 16,
col=c("black", "grey", "grey", "purple"),#if you want to change colors
colAlpha = 0.5,
title=paste(Signature_term),
subtitle = bquote(italic("Metabolic Pathway")),
caption = paste0("total = ", nrow(Volcano1), " genes of ", nrow(Pathway), " genes in pathway"),
xlim = c((((Reduce(min,Volcano1$Log2FC))))-0.5,(((Reduce(max,Volcano1$Log2FC))))+0.5),
ylim = c(0,((-log10(Reduce(min,Volcano1$p.adj))))+0.1),
#drawConnectors = TRUE,
#widthConnectors = 0.5,
#colConnectors = "black",
cutoffLineType = "dashed",
cutoffLineCol = "black",
cutoffLineWidth = 0.5,
legendPosition = 'right',
legendLabSize = 12,
legendIconSize = 5.0
)
ggsave(file=paste("Output_Figures/MetabolicPathways/VolcanoPlot_GAUDE", Signature_term, ".pdf", sep="_"), plot=VolcanoPlot, width=10, height=8)
plot(VolcanoPlot)
}
}
#Make an example DF:
DF <- read.csv("Input_GeneNames/GeneNames.csv")
DF$Log2FC <- runif(17021, min=-5, max=5)
DF$p.adj <- runif(17021, min=0, max=0.1)
#Remove Doublons in gene list:
RemoveDublons <-function(MyData){
MyData <- MyData[complete.cases(MyData),]
doublons <- as.character(MyData[duplicated(MyData$Gene),"Gene"])
print("Number of duplicated genes:")
print(length(doublons))
# Keep the entry with the greatest Log2FC:
MyData$absLog2FC <- abs(MyData$Log2FC)
MyData <- MyData[ order(MyData$absLog2FC), ]
MyData_Select <- MyData[!duplicated(MyData$Gene),]
#Safe:
OutputFileName <- MyData_Select
}#Remove duplicated genes: Function
DF_ND <- RemoveDublons(MyData= DF)
## [1] "Number of duplicated genes:"
## [1] 1353
#Load the Metabolic Pathways from gaude et al:
Metabolic_Signature <-read.csv("Input_MetabolicPathways_Gaude/41467_2016_BFncomms13041_MOESM340_ESM.csv")
Correction_Metabolic_Signature <- read.csv("Input_MetabolicPathways_Gaude/41467_2016_BFncomms13041_MOESM341_ESM.csv")%>%
mutate(Unique = case_when(associated_Pathways =="1" ~ 'Unique',
TRUE ~ 'In multiple Pathways'))
Metabolic_Signature <-merge(x=Metabolic_Signature, y=Correction_Metabolic_Signature, by.x ="gene", by.y="external_gene_name", all.x=TRUE)
#Make the plots:
Pathway_Names <- Metabolic_Signature[!duplicated(Metabolic_Signature$term),]
Pathway_Names <- Pathway_Names$term
for (i in Pathway_Names){
VolcanoPlot_MetabolicPathway(Input=DF_ND,
Signature= Metabolic_Signature,
Signature_term=i)
}
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Germany.1252 LC_CTYPE=English_Germany.1252
## [3] LC_MONETARY=English_Germany.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Germany.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] EnhancedVolcano_1.12.0 ggrepel_0.9.1 wakefield_0.3.6
## [4] rmarkdown_2.13 forcats_0.5.1 stringr_1.4.0
## [7] dplyr_1.0.7 purrr_0.3.4 readr_2.1.2
## [10] tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5
## [13] tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.2 sass_0.4.0 maps_3.4.0 jsonlite_1.8.0
## [5] modelr_0.1.8 bslib_0.3.1 assertthat_0.2.1 highr_0.9
## [9] vipor_0.4.5 cellranger_1.1.0 yaml_2.3.5 Rttf2pt1_1.3.10
## [13] pillar_1.7.0 backports_1.4.1 glue_1.6.0 extrafontdb_1.0
## [17] digest_0.6.29 RColorBrewer_1.1-2 rvest_1.0.2 colorspace_2.0-3
## [21] htmltools_0.5.2 pkgconfig_2.0.3 broom_0.7.12 haven_2.4.3
## [25] scales_1.1.1 ggrastr_1.0.1 tzdb_0.2.0 farver_2.1.0
## [29] generics_0.1.2 ellipsis_0.3.2 withr_2.5.0 cli_3.1.0
## [33] magrittr_2.0.1 crayon_1.5.0 readxl_1.3.1 evaluate_0.15
## [37] ash_1.0-15 fs_1.5.2 fansi_0.5.0 MASS_7.3-55
## [41] xml2_1.3.3 beeswarm_0.4.0 textshaping_0.3.6 tools_4.1.3
## [45] hms_1.1.1 lifecycle_1.0.1 munsell_0.5.0 reprex_2.0.1
## [49] compiler_4.1.3 ggalt_0.4.0 jquerylib_0.1.4 systemfonts_1.0.4
## [53] rlang_1.0.2 grid_4.1.3 rstudioapi_0.13 labeling_0.4.2
## [57] proj4_1.0-11 gtable_0.3.0 DBI_1.1.2 R6_2.5.1
## [61] lubridate_1.8.0 knitr_1.37 fastmap_1.1.0 extrafont_0.17
## [65] utf8_1.2.2 ragg_1.2.2 KernSmooth_2.23-20 stringi_1.7.6
## [69] ggbeeswarm_0.6.0 Rcpp_1.0.8 vctrs_0.3.8 dbplyr_2.1.1
## [73] tidyselect_1.1.2 xfun_0.30